Data Vizualization
Setup
Load library
# SOURCE: https://cedricscherer.netlify.app/2019/05/17/the-evolution-of-a-ggplot-ep.-1/#aim
# Packages
required_packages <- c(
"tidyverse",
"rgdal",
"RColorBrewer",
"leaflet",
"rgeos",
"openxlsx",
"ggthemes",
"tidyverse"
)
for (pkg in required_packages) {
# install packages if not already present
if (!pkg %in% rownames(installed.packages())) {
install.packages(pkg)
}
# load packages to this current session
library(pkg, character.only = TRUE)
}Load datasets
library(tidyverse)
df_students <- readr::read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-05-07/student_teacher_ratio.csv")
df_world_tile <- readr::read_csv("https://gist.githubusercontent.com/maartenzam/787498bbc07ae06b637447dbd430ea0a/raw/9a9dafafb44d8990f85243a9c7ca349acd3a0d07/worldtilegrid.csv") %>%
mutate(
## Namibias two-digit country code is handled as `NA` - let us fix that
alpha.2 = if_else(name == "Namibia", "NA", alpha.2),
## We are going to split "Americas" into "North America" and "Sout America"
region = if_else(region == "Americas", sub.region, region),
region = if_else(region %in% c("Northern America", "Central America", "Caribbean"),
"North America", region),
region = if_else(region == "Southern America", "South America", region),
## to join both data sets, we need a id column
country_code = alpha.3
)
df_ratios <- df_students %>%
## Let's keep only the most recent data per country
group_by(country, indicator) %>%
filter(year == max(year)) %>%
ungroup() %>%
# Create `NA`s for countries which do not have any data 2012-2018
complete(indicator, nesting(country, country_code)) %>%
## Let's focus on primary education and keep only countries (coded by letters)
filter(
indicator == "Primary Education",
str_detect(country_code, "[A-Z]")
) %>%
## merge with world tile map data
full_join(df_world_tile) %>%
filter(
!is.na(region),
!is.na(indicator)
) %>%
group_by(region) %>%
mutate(student_ratio_region = median(student_ratio, na.rm = T)) %>%
ungroup()First plots
p1<- ggplot(df_ratios, aes(x = region, y = student_ratio)) +
geom_boxplot()
p1# Sort the data
df_sorted <-
df_ratios %>%
mutate(region = fct_reorder(region, -student_ratio_region))
p2<- ggplot(df_sorted, aes(x = region, y = student_ratio)) +
geom_boxplot()
p2p2+ labs(title = "Student to teacher ratio",
subtitle = "Lastest available data",
y = "Student to teacher ratio",
x = "Region",
caption = "Source: UNESCO")p3<- ggplot(df_sorted, aes(x = region, y = student_ratio)) +
geom_boxplot() +
coord_flip() +
scale_y_continuous(limits = c(0, 90)) +
labs(title = "Student to teacher ratio",
subtitle = "Lastest available data",
y = "Student to teacher ratio",
x = "Region",
caption = "Source: UNESCO")
p3Change default plot settings
tema_enmisp<- theme(panel.grid.minor = element_blank(),
panel.grid.major.y = element_blank(),
legend.title=element_blank(),
axis.text = element_text(face = "bold", size = 8, color = "black"),
axis.title = element_text(size = 10),
plot.title = element_text(face = "bold", size = 12),
legend.position = "none",
plot.caption = element_text(size = 8, face = "italic")
)
p4<- ggplot(df_sorted, aes(x = region, y = student_ratio, color = region)) +
geom_jitter(size = 2, alpha = 0.25, width = 0.2) +
stat_summary(fun = mean, geom = "point", size = 5) +
#geom_boxplot() +
coord_flip() +
scale_y_continuous(limits = c(0, 90)) +
labs(title = "Student to teacher ratio",
subtitle = "Lastest available data",
y = "Student to teacher ratio",
x = "Region",
caption = "Source: UNESCO") +
theme_minimal() +
tema_enmisp
p4 world_avg <-
df_ratios %>%
summarize(avg = mean(student_ratio, na.rm = TRUE)) %>%
pull(avg)
p5 <- p4 +
geom_segment(
aes(x = region, xend = region,
y = world_avg, yend = student_ratio_region),
size = 0.8
) +
geom_hline(aes(yintercept = world_avg), color = "gray70", size = 0.6) +
geom_jitter(size = 2, alpha = 0.25, width = 0.2) +
stat_summary(fun = mean, geom = "point", size = 5)
p5 ## Final Plot
## coordinates for arrows
arrows <-
tibble(
x1 = c(6, 3.65, 1.8, 1.8, 1.8),
x2 = c(5.6, 4, 2.18, 2.76, 0.9),
y1 = c(world_avg + 6, 10.5, 9, 9, 77),
y2 = c(world_avg + 0.1, 18.4, 14.16, 12, 83.42)
)
p5 +
annotate(
"text", x = 6.3, y = 35,
size = 2.7, color = "gray20",
label = glue::glue("Worldwide average:\n{round(world_avg, 1)} students per teacher")
) +
annotate(
"text", x = 3.5, y = 10,
size = 2.7, color = "gray20",
label = "Continental average"
) +
annotate(
"text", x = 1.7, y = 11,
size = 2.7, color = "gray20",
label = "Countries per continent"
) +
annotate(
"text", x = 1.9, y = 64,
size = 2.7, color = "gray20",
label = "The Central African Republic has by far\nthe most students per teacher"
) +
geom_curve(
data = arrows, aes(x = x1, xend = x2,
y = y1, yend = y2),
arrow = arrow(length = unit(0.08, "inch")), size = 0.5,
color = "gray20", curvature = -0.3#
) Save plot
ggsave( filename = here::here("figures",
paste0(Sys.Date(), "_student_ratio", ".png")),
width = 20.49,
height = 10.3,
units = c("cm"),
dpi = "retina")Line plots
sns_epe <- readRDS("datasets/sns_epe.rds")
sns_sum_m <- readRDS("datasets/sns_sum_m.rds")
p6 <- ggplot(data = sns_epe,
mapping = aes(x = date,
y = arrears_sum,
colour = year)) +
geom_point(alpha = 0.5) +
geom_smooth(method = loess,
se = FALSE) +
scale_x_date(date_breaks = "1 year", date_labels = "%Y") +
geom_vline(xintercept=as.numeric(as.Date("2015-11-01")), colour="red") +
geom_text(aes(x=as.Date("2015-09-01"),
label="XXI Governo",y=700),
colour="black",
angle=90,
vjust = 1.2,
size = 2) +
geom_vline(xintercept=as.numeric(as.Date("2019-10-01")), colour="red") +
geom_text(aes(x=as.Date("2019-08-01"),
label="XXII Governo",y=700),
colour="black",
angle=90,
vjust = 1.2,
size = 2)+
labs(title = "Dívidas em atraso dos Hospitais EPE",
subtitle = "2014 - 2020",
color='Ano (linear)',
y = "Dívidas em milhões Euros",
x = "Ano") +
theme(plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5)) +
theme_minimal() +
tema_enmisp
p6
sns_sum_m$colour <- ifelse(sns_sum_m$arrear_m_var1 <= 0, "Negative","Positive")
sns_sum_m$hjust <- ifelse(sns_sum_m$arrear_m_var1 > 0, 1.3, -0.3)
p7 <- ggplot(data = sns_sum_m,
mapping = aes(x = date,
y = arrear_m_var1,
label="",
hjust=hjust,
na.rm = TRUE)) +
geom_bar(stat="identity",
position="identity",
aes(fill = colour)) +
# geom_text(aes(y=0,colour=colour)) +
scale_fill_manual(values=c(Positive="#00BFC4",
Negative="#F8766D")) +
scale_x_date(date_breaks = "1 year", date_labels = "%Y") +
labs(subtitle = "Variação mensal das dívidas ",
y = "Variação mensal",
x = "Ano",
caption = "Source: Portal da Transparencia SNS",
fill = "Arrears Var") +
theme(plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5)) +
theme_minimal() +
tema_enmisp
p7Patchwork
#install.packages("patchwork")
library(patchwork)
p8<- p6 / p7
p8Save plot
ggsave( filename = here::here("figures",
paste0(Sys.Date(), "_hospital_debt", ".png")),
width = 20.49,
height = 10.3,
units = c("cm"),
dpi = "retina")Create a Map
comm_pt <- readOGR(
dsn= "datasets/concelhos-shapefile" , ###
layer="concelhos",
verbose=FALSE,
use_iconv = TRUE, ###
encoding = "UTF-8" ###
)
# make the polygons a bit less verbose
comm_pt1 <- gSimplify(comm_pt, 0.01, topologyPreserve=TRUE)
comm_pt1 = SpatialPolygonsDataFrame(comm_pt1, data=comm_pt@data)
comm_pt1@data <- comm_pt1@data %>%
dplyr::select(
"ISO",
"ID_1",
"NAME_1",
"ID_2",
"NAME_2",
)
covid_inc <- read_csv("datasets/covid_inc.csv")
covid_inc <- covid_inc %>%
rename(NAME_2=Concelho) %>%
mutate( NAME_2=recode(NAME_2,
'Ponte de Sor' = "Ponte de Sôr"
))
comm_pt1@data<-left_join(comm_pt1@data,
covid_inc,
by = "NAME_2")
#comm_pt1@data<- comm_pt1@data %>%
# filter(
# !ARS %in% c("Açores", "Madeira")
# )mybins <- c(0,60,120,240,480,Inf)
mycolors <- c("#f7f7f7","#fee391","#fb6a4a","#cb181d","#67000d")
mypalette <- colorBin( palette=mycolors, domain=comm_pt1@data$Incidencia, na.color="transparent", bins=mybins)
# Prepare the text for tooltips:
mytext <- paste(
"Concelho: ", comm_pt1@data$NAME_2,"<br/>",
"Incidência: ", comm_pt1@data$Incidencia, "<br/>",
"Categoria: ", comm_pt1@data$Incidencia_descritivo,
sep="") %>%
lapply(htmltools::HTML)
# Final Map
leaflet(comm_pt1) %>%
addTiles() %>%
setView( lat=39.5, lng=-7.6, zoom=7) %>%
addProviderTiles("CartoDB.Positron") %>%
addPolygons(
fillColor = ~mypalette(Incidencia),
stroke=TRUE,
fillOpacity = 0.9,
color="grey",
weight=0.3,
label = mytext,
labelOptions = labelOptions(
style = list("font-weight" = "normal", padding = "3px 8px"),
textsize = "13px",
direction = "auto"
)
) %>%
addLegend( pal=mypalette, values=~Incidencia, opacity=0.9, title = "Incidência", position = "bottomleft" )library(viridis)
# I need to fortify the data AND keep trace of the commune code! (Takes ~2 minutes)
library(broom)
comm_pt1_fortified <- tidy(comm_pt1, region = "NAME_2")
ggplot() +
geom_polygon(data=comm_pt1_fortified,
aes(x=long,
y=lat,),
color="white",
size=.2) +
theme_void() +
coord_map() +
scale_fill_binned(type = "viridis", direction=-1) +
labs(title="Population in New York City",
subtitle="Neighborhoods are filled by population",
fill="Population")